from pathlib import Path
import numpy as np
import json
import pandas as pd
import plotly.express as px
import sys
sys.path.append("..")
base_path = "../"
Functions to compute useful metrics on the CAV results of different approaches
def rank2n_cav(rank):
if isinstance(rank,list):
return rank[0]
else:
return rank
def rank2size(row):
if isinstance(row.rank,list):
return np.prod(row.rank)
else:
return row.rank*13*3*row.dataset_size
def reducer_type_class(row):
if row.reducer == "NMF":
return "NMF"
else:
return f"NTD{row.dimension}"
def full_dimension(row):
if row.reducer_type == "NMF":
return True
elif row.reducer_type == "NTD3":
return row["rank"][1:] == [39,row.dataset_size]
elif row.reducer_type == "NTD4":
return row["rank"][1:] == [13,3,row.dataset_size]
else:
print(row.reducer_type )
def classes2composers(classes):
return str(classes)
def add_info_df(df):
df['n_cavs'] = df.apply (lambda row: rank2n_cav(row["rank"]), axis=1)
df['rank_str'] = df.apply (lambda row: str(row["rank"]), axis=1)
df["reducer_type"] = df.apply(lambda row: reducer_type_class(row), axis = 1)
df["useful_cavs"] = df.apply(lambda row: len(row.suggested_CAVs), axis = 1)
df["useful_cavs_perc"] = df["useful_cavs"]/df["n_cavs"]
df["full_dim"] = df.apply(lambda row: full_dimension(row), axis = 1)
df["composers"] = df.apply(lambda row: classes2composers(row.classes), axis = 1)
list_of_summary = []
for exp_f in Path(base_path,"Explainers").iterdir():
try:
with open(str(Path(exp_f,"summary.json"))) as summary_file:
summary = json.load(summary_file)
list_of_summary.append(summary)
except:
pass
df = pd.DataFrame(list_of_summary)
add_info_df(df)
df.shape
(105, 20)
There is not a clear advantage in one factorization technique over others, according to the averaged fidelity (average over the two composer classes).
px.strip(df[df["full_dim"]], x= 'n_cavs', y='fidelity_avg', color = "reducer_type", hover_data=['rank_str'], facet_col='composers', facet_col_wrap=1)
Let's consider as example the NTD 4d decomposition with ranks [4,13,3,186]. This is the folder "layer4_r[4, 13, 3, 186][Chopin_Bach]"
The CAVs produced have conceptual sensitivities:
"CAV0": [ -0.0092, 0.0716 ],
"CAV1": [ 0.237, 0.603 ],
"CAV2": [ -0.491, 1.145 ],
"CAV3": [ 1.264, -0.689 ]
This means that CAVs 3,2,0 are useful to distinguish between Bach and Chopin. They are ordered according to the sensitivity difference.
Let's check CAVs 3 (move the threshold slider to start the visualization).
The MIDI files are listenable in the folder feature_midis (file feature3-0.mid, feature3-1.mid, ...feature3-4.mid).
from IPython.display import IFrame
IFrame(src="layer4_r[4, 13, 3, 186][Chopin_Bach]/feature_imgs/3plotly.html", width=1500, height=1000)
Let's check the CAV 2. This is pushing in Bach direction.
from IPython.display import IFrame
IFrame(src="layer4_r[4, 13, 3, 186][Chopin_Bach]/feature_imgs/2plotly.html", width=1500, height=1000)
Let's check the CAV 0. This is not so easy to understand. But the average concept presence is very low, so it should not be so significant.
from IPython.display import IFrame
IFrame(src="layer4_r[4, 13, 3, 186][Chopin_Bach]/feature_imgs/0plotly.html", width=1500, height=1000)
Other experiments can be found in the other folders. The rank and composers are specified in the name of the folder.